# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem


class DuplicatePipeline(object):
    """
    去除重复item
    请对spider.name进行判断
    """
    def __init__(self):
        self.ids_seen = set()

    def process_item(self, item, spider):
        if spider.name == "scholarInfo":
            if item['scholarId'] in self.ids_seen:
                raise DropItem("Duplicate item found: %s" % item)
            else:
                self.ids_seen.add(item['scholarId'])
                return item
        else:
            return item
